# -*- coding: utf-8 -*-
import requests
from lxml import etree
import openpyxl


def get_html(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36"
    }
    html = requests.get(url)
    return html.text

def parse_content(content):
    e = etree.HTML(content)
    year_list = e.xpath("//tr/td[1]/text()")
    gdp_list = e.xpath("//tr/td[2]/text()")
    percents_list = e.xpath("//tr/td[3]/text()")
    year_list = list(filter(is_correct,year_list))
    year_list = list(map(int,year_list))
    gdp_list = list(map(extract, gdp_list))
    percents_list = list(map(delete_char,percents_list))
    print(year_list)
    print(gdp_list)
    print(percents_list)
    save_data(year_list,gdp_list,percents_list)


def save_data(year_list,gdp_list,percents_list):
    wk = openpyxl.Workbook()
    sheet = wk.active
    for i in range(len(year_list)):
        sheet.append([year_list[i],gdp_list[i],percents_list[i]])
    wk.save("t4.xlsx")

def extract(s):
    return int(s[s.find('(')+1:s.rfind(')')].replace(",",""))

def delete_char(s):
    return float(s.replace("%",""))

def is_correct(s):
    s = s.strip()
    if s:
        return s.isdigit()
    return False



if __name__ == '__main__':
    content = get_html("https://www.kylc.com/stats/global/yearly_per_country/g_gdp/usa.html")
    parse_content(content)
    # print(extract("21.43万亿 (21,427,700,000,000)"))
    # print(int('743,700,000,000'.replace(",","")))



